Setup & Infile

Column

knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(ggplot2)
library(readxl)
library(stringr)
library(lubridate)
library(plotly)
dataset <- read.csv("dataset.csv", stringsAsFactors = FALSE)
str(dataset)
'data.frame':   245 obs. of  4 variables:
 $ X   : int  1 2 3 4 5 6 7 8 9 10 ...
 $ date: chr  "2018-02-07" "2018-02-07" "2018-02-07" "2018-02-08" ...
 $ acct: chr  "naver.com" "gmail.com" "gmail.com" "nate.com" ...
 $ path: chr  "페이스북" "블로그, 페이스북" "홈페이지" "페이스북" ...
unique(dataset$acct)
 [1] "naver.com"           "gmail.com"           "nate.com"           
 [4] "allbr.co.kr"         "yahoo.com"           "hanmail.net"        
 [7] "daum.net"            "naver.con"           "afotrade.com"       
[10] NA                    "nvaer.com"           "hotmail.com"        
[13] "nomadconnection.com" "hanmauk.net"         "ajou.ac.kr"         
[16] "legalinsight.kr"    
dataset$acct[dataset$acct %in% c("naver.con", "nvaer.com")] <- "naver.com"
dataset$acct[dataset$acct %in% c("hanmail.net", "hanmauk.net")] <- "daum.net"
dataset$date <- as.Date(dataset$date)
dataset$acct <- as.factor(dataset$acct)

Column

dataset$fb     <- str_detect(dataset$path, "페이스북")
dataset$blog   <- str_detect(dataset$path, "블로그")
dataset$web    <- str_detect(dataset$path, "홈페이지")
dataset$jiin   <- str_detect(dataset$path, "지인")
dataset$search <- str_detect(dataset$path, "검색")
dataset$cafe   <- str_detect(dataset$path, "카페")
dataset$insta  <- str_detect(dataset$path, "인스타")
# count by acct  
dataset %>%
  group_by(acct) %>%
  summarise(count = length(acct)) %>%
  arrange(desc(count))
# A tibble: 12 x 2
   acct                count
   <fct>               <int>
 1 naver.com             120
 2 gmail.com              84
 3 daum.net               26
 4 nate.com                4
 5 allbr.co.kr             2
 6 yahoo.com               2
 7 <NA>                    2
 8 afotrade.com            1
 9 ajou.ac.kr              1
10 hotmail.com             1
11 legalinsight.kr         1
12 nomadconnection.com     1
# mutate acctType
dataset <- dataset %>%
  mutate(acctType = 
           ifelse(acct=="naver.com", "naver",
                  ifelse(acct=="gmail.com", "gmail",
                         ifelse(acct=="hanmail.net", "daum",
                                ifelse(acct=="daum.net", "daum", "other")))))
dataset$acctType <- as.factor(dataset$acctType)
dataset_acctType <- dataset %>%
  group_by(acctType) %>%
  summarise(
    fb = sum(fb), blog = sum(blog), web = sum(web), 
    jiin = sum(jiin), search = sum(search),
    cafe = sum(cafe), insta = sum(insta))
dataset_acctType <- dataset_acctType[1:4,]

Time Series by month

Column

dataset_month <- dataset %>%
  group_by(Y = year(date), M = month(date)) %>%
  summarise(
    fb = sum(fb), blog = sum(blog), web = sum(web), 
    jiin = sum(jiin), search = sum(search),
    cafe = sum(cafe), insta = sum(insta)) %>%
  mutate(Date = as.Date(paste(Y, M, 1, sep = "-")))
# multiple geom_line
fig1 <- ggplot(dataset_month, aes(x = Date)) + 
  geom_line(aes(y = fb, colour = "fb")) + 
  geom_line(aes(y = blog, colour = "blog")) + 
  geom_line(aes(y = web, colour = "web")) + 
  geom_line(aes(y = jiin, colour = "jiin")) + 
  geom_line(aes(y = cafe, colour = "cafe")) + 
  geom_line(aes(y = insta, colour = "insta")) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
  scale_x_date(breaks = dataset_month$Date) +
  labs(x = "Month", y = NULL)

Column

ggplotly(fig1)

Piechart - All time

Column

# All Time
df <- data.frame(
  channel = colnames(dataset_month)[3:9],
  freq = colSums(dataset_month[, c(3:9)]),
  stringsAsFactors = FALSE)
# M91.page 39 - pie-chart
fig2 <- ggplot(df, aes(x = "", y = freq, fill = factor(channel))) +
  geom_bar(width = 1, stat = "identity") +
  theme(axis.line = element_blank(),
        plot.title = element_text(hjust=0.5)) +
  labs(fill="channel", x=NULL, y=NULL) + 
  coord_polar(theta = "y", start=0)

Column

print(fig2)

Piechart - Facet by Month

Column

# facet by month
# googled "r facet piechart"
# https://stackoverflow.com/questions/25372055/how-can-i-use-facet-wrap-using-pie-charts
library(reshape2) 
dataset_month_long <- 
  data.frame(Date = dataset_month$Date, dataset_month[,3:9]) %>% 
  melt(id = "Date") # Change it to "long" type - not tidy!
fig4 <- 
  ggplot(dataset_month_long,
         aes(x = 1, y = value, fill = variable)) + 
  geom_bar(stat = "identity", 
           color = "black",
           position = position_fill()) +
  coord_polar(theta = "y") + 
  theme(axis.ticks = element_blank(),
        axis.text.x = element_text(colour='black'),
        axis.text.y = element_blank(),
        axis.title = element_blank(),
        plot.title = element_text(hjust=0.5)) + 
  scale_y_continuous(
    breaks = cumsum(dataset_month_long$value) - dataset_month_long$value/2,
    labels = dataset_month_long$variable) +
  facet_wrap( ~ Date)

Column

print(fig4)

Piechart - Facet by Month

Column

dataset_acctType_proportion <-
  data.frame(
    acctType = dataset_acctType$acctType,
    round(dataset_acctType[,2:8]/rowSums(dataset_acctType[,2:8]), 2),
    subtotal = rowSums(dataset_acctType[,2:8])) 
dataset_acctType_proportion <- dataset_acctType_proportion %>% arrange(desc(subtotal))
dataset_acctType_proportion
  acctType   fb blog  web jiin search cafe insta subtotal
1    naver 0.46 0.16 0.05 0.04   0.16 0.03  0.10      135
2    gmail 0.44 0.09 0.10 0.05   0.15 0.09  0.10       94
3     daum 0.38 0.14 0.00 0.00   0.24 0.10  0.14       29
4    other 0.79 0.00 0.07 0.14   0.00 0.00  0.00       14
table(dataset$acct)

       afotrade.com          ajou.ac.kr         allbr.co.kr 
                  1                   1                   2 
           daum.net           gmail.com         hotmail.com 
                 26                  84                   1 
    legalinsight.kr            nate.com           naver.com 
                  1                   4                 120 
nomadconnection.com           yahoo.com 
                  1                   2 
dataset_acctType_long <- 
  data.frame(acctType = dataset_acctType$acctType, dataset_acctType[,2:8]) %>% 
  melt(id = "acctType") # Change it to "long" type - not tidy!
fig5 <- 
  ggplot(dataset_acctType_long,
         aes(x = 1, y = value, fill = variable)) + 
  geom_bar(stat = "identity", 
           color = "black",
           position = position_fill()) +
  coord_polar(theta = "y") + 
  theme(axis.ticks = element_blank(),
        axis.text.x = element_text(colour='black'),
        axis.text.y = element_blank(),
        axis.title = element_blank(),
        plot.title = element_text(hjust=0.5)) + 
  scale_y_continuous(
    breaks = cumsum(dataset_acctType_long$value) - dataset_acctType_long$value/2,
    labels = dataset_acctType_long$variable) +
  facet_wrap( ~ acctType)

Column

print(fig5)